import sys
sys.path.append("..")
import numpy as np
import matplotlib.pyplot as plt
from common.util import preprocess, create_co_matrix, ppmi

if __name__=="__main__":
    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    # 输出共现矩阵
    C = create_co_matrix(corpus, vocab_size,window_size=1)
    W  = ppmi(C)

    # SVD 奇异值分解降维
    U,S,V = np.linalg.svd(W)
    # print('covariance matrix')
    # print(C)
    # print("-"*50)
    # print("PPMI")
    # print(W)
    # print(U)
    # print(U[0,:2])
    for word,word_id in word_to_id.items():
        plt.annotate(word,(U[word_id,0],U[word_id,1]))
    plt.scatter(U[:,0],U[:,1],alpha=0.5)
    plt.show()


